# -*- coding: UTF-8 -*-
from urllib import request
import re

def getdata():
    url = 'http://www.gdut.edu.cn/'
    send_headers = {
 'Host':'www.gdut.edu.cn',
 'User-Agent':'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0',
 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 'Connection':'keep-alive'
}
 #创建Request对象
    req = request.Request(url, headers=send_headers)
    #传入创建好的Request对象
    response = request.urlopen(req)
    #读取响应信息并解码
    html = response.read().decode('utf-8')
    #打印信息
    # print(html)
    html = html.replace('\r\n','')
    reg = r'<DIV class="news-.*?">(.*?)<!--#endeditable-->'
    return re.findall(reg, html)
def go():
    for i in getdata():
        #print(i)
        reg = r'tagname="(.*?)"'
        tagname = re.findall(reg, i)[0];
        reg = r'<li><a href="(.*?)" title="(.*?)"'
        title_url = re.findall(reg, i);
        for j in title_url:
            content = '%s,%s,%s\n' %(tagname, j[1], j[0])
            print(content)
            write_file('1.csv', content)
        # print(title_url)
        # exit();
def write_file(filename, content):
    fn = open(filename, 'a+')
    #content = bytes(content, encoding = "utf8")  
    fn.write(content)
    fn.close()

if __name__ == "__main__":
    go()
